
use LONG_MI_NATURE_20180111.dta, clear

* DROPPING JAPAN
drop if countryid==20

egen _couXpar_=concat(countryid partyid), decode p(" ")
sort _couXpar_

* CORRECT CODING ERROR FOR FINLAND
replace gov_=1 if countryid==13 & elecdate==18734

gen pres=0
replace pres=1 if election=="Presidential"

gen pr=0
replace pr=1 if rule=="PR"

gen cand=0
replace cand=1 if espv=="Candidate"

gen enpp3=0
replace enpp3=1 if enpp>3

gen large = .
replace large = 1 if vote_!=. & vote_>=20
replace large = 0 if vote_!=. & vote_<20

gen small = .
replace small = 1 if vote_!=. & vote_<20
replace small = 0 if vote_!=. & vote_>=20

gen new = .
replace new = 0 if regime=="Old"
replace new = 1 if regime=="New"

gen err_ = vote_-poll_
gen ierr_ = vote_-ipoll_

gen mae = abs(vote_-poll_)
gen imae = abs(vote_-ipoll_)

* MAE OF THE FINAL (RAW) POLLS
drop if daysbeforeED>7
drop if daysbeforeED==0

egen _couXlegXparXele_=concat(countryid election elecdate partyid), decode p(" ")

egen _couXele_=concat(countryid elecdate), decode p(" ")
egen _couXlegXele_=concat(countryid election elecdate), decode p(" ")

* N OF POLLS (POLL-OF-POLLS) IN THE FINAL WEEK OF THE CAMPAIGN
egen _couXlegXeleXdays_=concat(countryid election elecdate daysbeforeED), decode p(" ")
tab _couXlegXeleXdays_ if mae!=.

* NUMBER OF COUNTRIES (N = 32) /ELECTIONS (N = 220)
tab countryid 
tab countryid if mae!=.
egen countries=group(countryid) 
tab countries if mae!=.

tab _couXlegXele_
tab _couXlegXele_ if mae!=.
egen elections=group(_couXlegXele_)
tab elections
tab elections if mae!=.

** -> ERROR ON THE LEAD
egen _couXlegXeleXrnd_=concat(countryid election elecdate round), decode p(" ")
sort _couXlegXeleXrnd_ vote_

egen group=group(countryid election elecdate round vote_)
egen group2=group(countryid election elecdate round daysbeforeED)

by _couXlegXeleXrnd_: egen last=max(group)

gen rank=(last-group)+1

by _couXlegXeleXrnd_: gen vote1=vote_ if rank==1
by _couXlegXeleXrnd_: gen vote2=vote_ if rank==2

by _couXlegXeleXrnd_: egen vote1st=max(vote1) 
by _couXlegXeleXrnd_: egen vote2nd=max(vote2)
 
gen poll1=poll_ if rank==1
gen poll2=poll_ if rank==2

sort group2
by group2: egen poll1st=max(poll1) 
by group2: egen poll2nd=max(poll2)
 
gen winner=0
replace winner=1 if poll1st>poll2nd

gen votelead=vote1st-vote2nd
gen polllead=poll1st-poll2nd

gen abslead=abs(votelead-polllead)

** ODDS RATIO MEASURE OF POLLING BIAS (ARZHEIMER & EVANS)
gen A=log( (poll_/(100-poll_)*((100-vote_)/vote_)) )
gen An=(poll_/(100-poll_)*((100-vote_)/vote_)) 
gen Bi=abs(A)

gen A1st=abs(log( (poll1st/(100-poll1st)*((100-vote1st)/vote1st)) ))
gen A2nd=abs(log( (poll2nd/(100-poll2nd)*((100-vote2nd)/vote2nd)) ))

gen winnerBi=A1st/A2nd

gen marginA=(polllead/votelead)
gen marginBi=abs(marginA)
gen marginB=abs(log(polllead/votelead))

egen _couXlegXeleXrndXday_=concat(countryid election elecdate round daysbeforeED), decode p(" ")
sort _couXlegXeleXrndXday_
by _couXlegXeleXrndXday_: egen B = mean(Bi)

* COLLAPSE DATA - BY PARTY*ELECTION
collapse (mean) abslead marginB pres pr cand enpp3 large new gov_ yr countryid, by(_couXlegXele_)

* POOLED ANALYSES - ERROR ON MARGIN, BY ELECTION *
reg marginB pres pr cand enpp3 yr
outreg using pollbi_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) replace

reg marginB yr
outreg using pollbi_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) merge

reg abslead pres pr cand enpp3 yr
outreg using lead_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) replace

reg abslead yr
outreg using lead_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) merge

* POOLED ANALYSIS LIMITED TO COUNTRIES WHERE DATA OVER TIME - TABLE 4 *
* Aus (2), Can (6), Fra (14), Ger (15), Ire (18), Net (23), NZ (24), Nor (25), Por (29), Spa (35), UK (39), US (40), Den (43)
*keep if countryid==2 | countryid==6 | countryid==14 | countryid==15 | countryid==18 | countryid==23 | countryid==24 | countryid==25 | countryid==29 | countryid==35 | countryid==39 | countryid==40 | countryid==43  
keep if countryid==2 | countryid==6 | countryid==14 | countryid==15 | countryid==18 | countryid==23 | countryid==24 | countryid==25 | countryid==39 | countryid==40 | countryid==43  
drop if yr<1977

* POOLED ANALYSES - ERROR ON MARGIN, BY ELECTION *
reg marginB pres pr cand enpp3 yr
outreg using pollbi_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) replace

reg marginB yr
outreg using pollbi_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) merge

reg abslead pres pr cand enpp3 yr
outreg using lead_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) replace

reg abslead yr
outreg using lead_all, se bdec(2) sigsymb(*,**,***) starlevels(5 1 .1) summstat(N \ r2 \ r2_a \ rmse) summtitle(N \ R-squared \ Adjusted R-squared \ RMSE) ctitle("", Raw data) merge


